reductionFactor <- 1/2
nbootiterations <- 40
}
if(bootstrapping == FALSE){
reductionFactor <- 1/20
nbootiterations <- 2
}
#----- Analysis starts here ------------
for(this.unit in c('cm', 'pgm')){
# Loop through the "steps". The steps are specific to the ViEWS competition. They refer to how far ahead I want to predict. Step 2 =2 months ahead, etc.
for(step in c(2:7)){
print('')
print('')
print(paste('STEP', step))
for(bootit in 21:nbootiterations){
print(paste('BOOT ITERATION', bootit))
# 1. Cut data into sequences
source('CODE/prepSequences.R')
# 2. calculate distance between sequences (for each step),
# Saves predictions to predictions.csv
source('CODE/calcDistances.R')
}
}
# 3. Import the predictions, calibrate using a simple lm on past data, and predict future data. Output a file of form 'predictionsChadefaux_cm_s5_forecastSet.csv'.
# Also output a Predictions/results.csv, which reports various metrics such as MSE
source('CODE/distancesToPred.R')
# NOTE: the below is only relevant for the ViEWS competition.
# 4. At this stage, all the important steps have been completed.
# The below is just a bit of tidying up
# First, merge all `prediction' files into a single csv file, with each step as one column, as per ViEWS instructions
source('CODE/mergeFiles.R')
# 5. just a bit of tidying up the files now
source('CODE/cleanUp.R')
}
for(bootit in 30:nbootiterations){
print(paste('BOOT ITERATION', bootit))
# 1. Cut data into sequences
source('CODE/prepSequences.R')
# 2. calculate distance between sequences (for each step),
# Saves predictions to predictions.csv
source('CODE/calcDistances.R')
}
1
for(this.step in c(2)){
df.boots <- NULL
for(booti in 1:nbootiterations){
# Import the prediction file
preds <- read_csv(paste('Results/RawPredictions/predictions_', this.unit,'_s', this.step, '_boot', booti, '.csv', sep=''))
preds <- as.data.frame(preds)
# name and format columns
colnames(preds) <- c('datePredMade' ,
'method',
'prediction',
'unitYM'  # date of last obs in the seq for that unit
)
preds$datePredMade <- NULL # remove this, was useful to track the time the file was changed, but now is just taking up precious memory
preds$unitYM <- as.character(preds$unitYM)
preds$method <- as.character(preds$method)
# split unitYM, a character vector, into its components
mypreds <- preds %>%
separate(unitYM, c("country_name", "cutDel", 'cutnDel', 'yearDel', 'year',
'MonthDel', 'month', 'IDdel', 'unit'), "_", remove=FALSE)
mypreds$cutDel <- mypreds$yearDel <- mypreds$MonthDel <- mypreds$cutnDel <- mypreds$IDdel <- NULL
mypreds$unitYM <- NULL
mypreds$country_name <- NULL
mypreds$year <- as.numeric(mypreds$year)
mypreds$month <- as.numeric(mypreds$month)
mypreds$unit <- as.numeric(mypreds$unit)
mypreds <- mypreds[order(mypreds$unit, mypreds$year, mypreds$month),]
# Merge it back into main data, df1, to get info about the "future"
df1 <- load.data(step = this.step, this.unit = this.unit)
dfa <- df1[,c( 'unit', 'month_id', 'future', 'year', 'month', 'in_africa')]
dfa$unit <- as.numeric(dfa$unit)
dfa$year <- as.numeric(dfa$year)
dfa$month <- as.numeric(dfa$month)
df4 <- merge(mypreds,
dfa,
by=c('unit', 'year', 'month'),
all.x=T)
# remove the dots in variable names; clean a bit
names(df4) <- gsub("\\.", "_", names(df4))
df4 <- df4[!is.na(df4$unit) & !is.na(df4$prediction),]
df4 <- df4[order(df4$unit, df4$month_id),]
df4.boot <- df4
df.boots <- rbind(df.boots, df4.boot)
# Define learning and testing samples, as per Views instructions
forecast.set1 <- c(408, 444) - this.step
forecast.set2 <- c(444, 480) - this.step
forecast.set3 <- c(487, 487 + this.step + 1) - this.step
for(forecast.set in list(forecast.set1, forecast.set2, forecast.set3)){
print(forecast.set)
df5 <- df4 # no purpose, just legacy
df5$learn <- 0
df5$learn[df5$month_id >= 121& df5$month_id <= forecast.set[1]] <- 1
df5$test <- 0
df5$test[df5$month_id > forecast.set[1] & df5$month_id <= forecast.set[2]] <- 1
# Estimate a model with only a constant: y ~ alpha. This is just to compare our predictions (below) to a basic benchmark.
lm.cons <- speedlm(future ~ 1, data=df5[df5$learn==1,], na.action = na.exclude)
# estimate y ~ alpha + yhat, where yhat is the estimate based on the DTW analysis.
# This step is useful to calibrate our predictions from the previous step
formula.shape <- as.formula(future ~ prediction)
lm.shape <- speedlm(formula.shape, data = df5[df5$learn==1,], na.action = na.exclude)
df5$pred.shape <-df5$pred.cons <- NA
# predict each model on the testing set
df5$pred.shape[df5$test==1] <- predict(lm.shape, newdata = df5[df5$test==1,])
df5$pred.cons[df5$test==1] <- predict(lm.cons, newdata = df5[df5$test==1,])
# report Out-of-sample performance metrics
# First, MSE:
with(df5[!is.na(df5$pred.shape) & !is.na(df5$pred.cons),],
print(paste('shape: ', mean((df5$future - df5$pred.shape)^2, na.rm=T))))
with(df5[!is.na(df5$pred.shape) & !is.na(df5$pred.cons),],
print(paste('shape (africa): ', mean((df5$future[df5$in_africa==1] - df5$pred.shape[df5$in_africa==1])^2, na.rm=T))))
with(df5[!is.na(df5$pred.shape) & !is.na(df5$pred.cons),],
print(paste('constant: ', mean((df5$future - df5$pred.cons)^2, na.rm=T))))
with(df5[!is.na(df5$pred.shape) & !is.na(df5$pred.cons),],
print(paste('constant (Africa): ', mean((df5$future[df5$in_africa==1] - df5$pred.cons[df5$in_africa==1])^2, na.rm=T))))
# Second, TADDA:
with(df5[!is.na(df5$pred.shape)  & !is.na(df5$pred.cons) ,],
print(paste('tadda ens: ', tadda(pred = df5$pred.shape, obs = df5$future, epsilon = 0.048))))
with(df5[!is.na(df5$pred.shape)  & !is.na(df5$pred.cons) ,],
print(paste('tadda constant:', tadda(pred = df5$pred.cons, obs = df5$future, epsilon = 0.048))))
# Export the data to transfer to ViEWS team
df.toexport <- data.frame(unit_id =  df5$unit,
unit_type = this.unit,
step = this.step,
month_id = df5$month_id + this.step,
last_month_id_of_data_used = df5$month_id,
prediction = df5$pred.shape
)
df.toexport <- df.toexport[!is.na(df.toexport$prediction), ]
colnames(df.toexport)[colnames(df.toexport)=='prediction'] <- paste('chadefaux_s',this.step,sep='' )
# Note: this ignores the bootstraps. Need to fix!
write.csv(df.toexport, file = paste('Results/PostPredictions/predictionsChadefaux_',this.unit,'_s', this.step,'_', forecast.set[1]+this.step,'.csv', sep=''))
# Calculate and export various metrics
results <- data.frame(unit = this.unit,
this.step = this.step,
forecastSet = forecast.set[1],
model = c('shape', 'cons'),
mse = c( mean((df5$future - df5$pred.shape)^2, na.rm=T),
mean((df5$future - df5$pred.cons)^2, na.rm=T))  ,
mse.africa = c( mean((df5$future[df5$in_africa==1] - df5$pred.shape[df5$in_africa==1])^2, na.rm=T),
mean((df5$future[df5$in_africa==1] - df5$pred.cons[df5$in_africa==1])^2, na.rm=T))  ,
tadda = c(tadda(pred = df5$pred.shape, obs = df5$future, epsilon = 0.048), tadda(pred = df5$pred.cons, obs = df5$future, epsilon = 0.048)),
tadda.africa = c(tadda(pred = df5$pred.shape[df5$in_africa==1], obs = df5$future[df5$in_africa==1], epsilon = 0.048), tadda(pred = df5$pred.cons, obs = df5$future, epsilon = 0.048))
)
print(results)
if(bootstrapping==TRUE){
# just write two sets of files, might be easier to manipulate later in a single file, not sure yet
# separate files for each boot iteration:
write.table(results, file = paste('Results/Metrics/results_boot', booti, '.csv', sep=''), append = T, sep=',' , row.names = F, col.names = F)
#one common file (redundant but can be useful):
write.table(results, file = 'Results/Metrics/resultsBoot.csv', append = T, sep=',' , row.names = F, col.names = F)
}
if(bootstrapping==FALSE) write.table(results, file = 'Results/Metrics/results.csv', append = T, sep=',' , row.names = F, col.names = F)
}
}
}
## Aggregate the bootstrapped results to get CIs
df.boot.with.CI <- aggregate(data.frame(prediction = df.boots$prediction),
by=list(unit = df.boots$unit,
month_id= df.boots$month_id),
FUN=function(x) quantile(x, probs = c(0.25, 0.5, 0.75)))
# Need to merge back into df.boot to get variables such as future etc back. But first I need to remove the duplicates in df.boots
tmp <- df.boots
tmp$prediction <- NULL
tmp <- tmp[!duplicated(tmp),]
## Finally get the data with upper and lower CI
df.boots.all <- merge(df.boot.with.CI, tmp, by=c('unit', 'month_id'), all.x=T)
# compare with a random guess of 0 +/- the same interval as the one in my bootstrapped range
df.boots.all$random <- 0
df.boots.all$random.hi <- df.boots.all$random + abs(df.boots.all$prediction[,3] - df.boots.all$prediction[,1])/2
df.boots.all$random.lo <- df.boots.all$random - abs(df.boots.all$prediction[,3] - df.boots.all$prediction[,1])/2
# What is the probability the truth falls within the  bounds?
df.boots.all <- df.boots.all[!is.na(df.boots.all$future),]
length(which(df.boots.all$future >= df.boots.all$prediction[,1] & df.boots.all$future <= df.boots.all$prediction[,3])) / nrow(df.boots.all)
length(which(df.boots.all$future >= df.boots.all$random.lo & df.boots.all$future <= df.boots.all$random.hi ))/nrow(df.boots.all)
## Confusion matrix
#proba that it increases if I say it increases
up <- 0
nrow(df.boots.all[df.boots.all$prediction[,2] > up & df.boots.all$future>up,])
#I say it increases, but 0 happens
nrow(df.boots.all[df.boots.all$prediction[,2] > up & df.boots.all$future==0,])
#I say it increases, but it decreases
nrow(df.boots.all[df.boots.all$prediction[,2] > up & df.boots.all$future<0,])
#I say 0 but it increases
nrow(df.boots.all[df.boots.all$prediction[,2] ==0 & df.boots.all$future>0,])
# I say 0 and it is 0
nrow(df.boots.all[df.boots.all$prediction[,2] ==0 & df.boots.all$future==0,])
# I say 0 and it decreases
nrow(df.boots.all[df.boots.all$prediction[,2] ==0 & df.boots.all$future<0,])
# I say decreases but it increases
nrow(df.boots.all[df.boots.all$prediction[,2] <0 & df.boots.all$future>0,])
# I say decreases but it is 0
nrow(df.boots.all[df.boots.all$prediction[,2] <0 & df.boots.all$future==0,])
# I say decreases and it does decreases
nrow(df.boots.all[df.boots.all$prediction[,2] <0 & df.boots.all$future<0,])
nrow(df.boots.all)
df.boots.all$xrough <- round(df.boots.all$prediction[,2]*10)/10
df.boots.all$futurerough <- round(df.boots.all$future*10)/10
ggplot(df.boots.all[df.boots.all$xrough!=0 & df.boots.all$futurerough!=0,],aes(x=xrough, y=futurerough)) +
stat_binhex()
1
ggplot(df.boots.all[df.boots.all$xrough!=0 & df.boots.all$futurerough!=0,],aes(x=xrough, y=futurerough)) +
stat_binhex()
dev.off()
dev.off()
dev.off()
ggplot(df.boots.all[df.boots.all$xrough!=0 & df.boots.all$futurerough!=0,],aes(x=xrough, y=futurerough)) +
stat_binhex()
ggplot(df.boots.all,aes(x=xrough, y=futurerough)) +
stat_binhex()
ggplot(df.boots.all[!(df.boots.all$xrough==0 & df.boots.all$futurerough==0),],aes(x=xrough, y=futurerough)) +
stat_binhex()
lm1 <- lm(df.boots.all$future ~  df.boots.all$prediction[,2],)
conf <- predict(lm1, interval='confidence')
intercept1 <- lm1$coefficients[1]
slope1 <- lm1$coefficients[2]
test <- df.boots.all[df.boots.all$future!=0,]
o5 <- ggplot(test, mapping = aes(prediction[,2], future)) +
geom_hex() +
scale_fill_viridis_c() +
geom_point(shape = '.', col = 'white')
o5 + geom_abline(intercept = intercept1, slope = slope1 , color='red')+xlim(-0.5,0.5)+ylim(-3.5, 3.5)
# MSEs with uncertainty
if(bootstrapping==TRUE){
these.preds <- NULL
for(bootnb in 1:nbootiterations){
print(bootnb)
this.pred <- read.csv(paste('Results/Metrics/results_boot',
bootnb, '.csv', sep=''))
colnames(this.pred) <- c('unit', 'step', 'forecastSet', 'model', 'mse', 'mse.africa', 'tadda', 'taddaAfrica')
these.preds <- rbind(these.preds, this.pred)
}
## Aggregate the bootstrapped results to get CIs
these.preds.with.ci <- aggregate(data.frame(mses = these.preds$mse.africa),
by=list(forecastSet = these.preds$forecastSet,
model= these.preds$model,
step= these.preds$step,
unit = these.preds$unit),
FUN=function(x) quantile(x, probs = c(0.05, 0.5, 0.95), na.rm=T))
these.preds.with.ci[these.preds.with.ci$forecastSet==442, ]
}
## Aggregate the bootstrapped results to get CIs
these.preds.with.ci <- aggregate(data.frame(mses = these.preds$mse.africa),
by=list(forecastSet = these.preds$forecastSet,
model= these.preds$model,
step= these.preds$step,
unit = these.preds$unit),
FUN=function(x) quantile(x, probs = c(0.25, 0.5, 0.75), na.rm=T))
these.preds.with.ci[these.preds.with.ci$forecastSet==442, ]
## Aggregate the bootstrapped results to get CIs
these.preds.with.ci <- aggregate(data.frame(mses = these.preds$mse.africa, taddas = these.preds$taddaAfrica),
by=list(forecastSet = these.preds$forecastSet,
model= these.preds$model,
step= these.preds$step,
unit = these.preds$unit),
FUN=function(x) quantile(x, probs = c(0.25, 0.5, 0.75), na.rm=T))
these.preds.with.ci[these.preds.with.ci$forecastSet==442, ]
plot(these.preds.with.ci$mses[,2])
with(these.preds.with.ci, plot(mses[forecastSet==442,2]))
these.preds.with.ci
with(these.preds.with.ci, plot(mses[forecastSet==442,2]))
these.preds.with.ci[these.preds.with.ci$forecastSet==442, ]
gplot(these.preds.with.ci,
aes(x = model, y = mses[,2]))
ggplot(these.preds.with.ci,
aes(x = model, y = mses[,2]))
ggplot(these.preds.with.ci,
aes(x = model, y = mses[,2])) +
geom_errorbar(aes(ymax = mses[,3], ymin = mses[,1]),
position = "dodge") +
geom_point(position = position_dodge(0.9))
ggplot(these.preds.with.ci[forecastSet==442,],
aes(x = model, y = mses[,2])) +
geom_errorbar(aes(ymax = mses[,3], ymin = mses[,1]),
position = "dodge") +
geom_point(position = position_dodge(0.9))
ggplot(these.preds.with.ci[these.preds.with.ci$forecastSet==442,],
aes(x = model, y = mses[,2])) +
geom_errorbar(aes(ymax = mses[,3], ymin = mses[,1]),
position = "dodge") +
geom_point(position = position_dodge(0.9))
head(these.preds.with.ci)
these.preds.with.ci.rs <- melt(these.preds.with.ci, id=c("forecastSet","model", 'step', 'unit'))
library(reshape)
install.packages('reshape')
these.preds.with.ci.rs <- melt(these.preds.with.ci, id=c("forecastSet","model", 'step', 'unit'))
library(reshape)
these.preds.with.ci.rs <- melt(these.preds.with.ci, id=c("forecastSet","model", 'step', 'unit'))
these.preds.with.ci.rs
ggplot(these.preds.with.ci[these.preds.with.ci$forecastSet==442,],
aes(x = variable, y = mses[,2], colour = model)) +
geom_errorbar(aes(ymax = mses[,3], ymin = mses[,1]),
position = "dodge") +
geom_point(position = position_dodge(0.9))
these.preds.with.ci.rs <- melt(these.preds.with.ci, id=c("forecastSet","model", 'step', 'unit'))
ggplot(these.preds.with.ci.rs[these.preds.with.ci.rs$forecastSet==442,],
aes(x = variable, y = mses[,2], colour = model)) +
geom_errorbar(aes(ymax = mses[,3], ymin = mses[,1]),
position = "dodge") +
geom_point(position = position_dodge(0.9))
these.preds.with.ci.rs
ggplot(these.preds.with.ci.rs[these.preds.with.ci.rs$forecastSet==442,],
aes(x = variable, y = value, colour = model)) +
geom_errorbar(aes(ymax = mses[,3], ymin = mses[,1]),
position = "dodge") +
geom_point(position = position_dodge(0.9))
?melt
these.preds.with.ci$mse.max <- these.preds.with.ci$mses[,3]
these.preds.with.ci$mse.max <- these.preds.with.ci$mses[,3]
these.preds.with.ci.rs <- melt(these.preds.with.ci, id=c('mse.max', "forecastSet","model", 'step', 'unit'))
head(these.preds.with.ci)
head(these.preds.with.ci.rs)
these.preds.with.ci$mse.max <- NULL
these.preds.with.ci.rs <- melt(these.preds.with.ci, id=c('mse.max', "forecastSet","model", 'step', 'unit'))
these.preds.with.ci.rs <- melt(these.preds.with.ci, id=c( "forecastSet","model", 'step', 'unit'))
head(these.preds.with.ci.rs)
head(these.preds.with.ci)
names(these.preds.with.ci.rs)
head(these.preds.with.ci.rs)
names(these.preds.with.ci.rs)
names(these.preds.with.ci.rs)[names(these.preds.with.ci.rs)=='NA'] <- 'valueMedian'
names(these.preds.with.ci.rs)
these.preds.with.ci.rs <- melt(these.preds.with.ci, id=c( "forecastSet","model", 'step', 'unit'))
names(these.preds.with.ci.rs)[names(these.preds.with.ci.rs)=='value'] <- 'valueLo'
names(these.preds.with.ci.rs)[names(these.preds.with.ci.rs)=='NA'] <- c('valueMedian', 'valueHi')
names(these.preds.with.ci.rs)[(length(these.preds.with.ci.rs)-1):(length(these.preds.with.ci.rs))] <- c('valueMedian', 'valueHi')
these.preds.with.ci.rs <- melt(these.preds.with.ci, id=c( "forecastSet","model", 'step', 'unit'))
names(these.preds.with.ci.rs)[names(these.preds.with.ci.rs)=='value'] <- 'valueLo'
names(these.preds.with.ci.rs)[(length(these.preds.with.ci.rs)-1):(length(these.preds.with.ci.rs))] <- c('valueMedian', 'valueHi')
head(these.preds.with.ci.rs)
these.preds.with.ci.rs <- melt(these.preds.with.ci, id=c( "forecastSet","model", 'step', 'unit'))
names(these.preds.with.ci.rs)[names(these.preds.with.ci.rs)=='value'] <- 'valueLo'
names(these.preds.with.ci.rs)[(length(these.preds.with.ci.rs)-1):(length(these.preds.with.ci.rs))] <- c('valueMedian', 'valueHi')
ggplot(these.preds.with.ci.rs[these.preds.with.ci.rs$forecastSet==442,],
aes(x = variable, y = valueMedian, colour = model)) +
geom_errorbar(aes(ymax = valueHi, ymin = valueLo),
position = "dodge") +
geom_point(position = position_dodge(0.9))
head(these.preds.with.ci)
ggplot(these.preds.with.ci.rs[these.preds.with.ci.rs$variable=='mses', these.preds.with.ci.rs$forecastSet==442,],
aes(x = step, y = valueMedian, colour = model)) +
geom_errorbar(aes(ymax = valueHi, ymin = valueLo),
position = "dodge") +
geom_point(position = position_dodge(0.9))
ggplot(these.preds.with.ci.rs[these.preds.with.ci.rs$variable=='mses'& these.preds.with.ci.rs$forecastSet==442,],
aes(x = step, y = valueMedian, colour = model)) +
geom_errorbar(aes(ymax = valueHi, ymin = valueLo),
position = "dodge") +
geom_point(position = position_dodge(0.9))
ggplot(these.preds.with.ci.rs[these.preds.with.ci.rs$variable=='mses'& these.preds.with.ci.rs$forecastSet==442,],
aes(x = step, y = valueMedian)) +
geom_errorbar(aes(ymax = valueHi, ymin = valueLo),
position = "dodge") +
geom_point(position = position_dodge(0.9))
ggplot(these.preds.with.ci.rs[these.preds.with.ci.rs$variable=='mses'& these.preds.with.ci.rs$forecastSet==442,],
aes(x = step, y = valueMedian, colour = model)) +
geom_errorbar(aes(ymax = valueHi, ymin = valueLo),
position = "dodge") +
geom_point(position = position_dodge(0.9))
?geom_errorbar
ggplot(these.preds.with.ci.rs[these.preds.with.ci.rs$variable=='mses'& these.preds.with.ci.rs$forecastSet==442,],
aes(x = step, y = valueMedian, colour = model)) +
geom_errorbar(aes(ymax = valueHi, ymin = valueLo),
position = "identity") +
geom_point(position = position_dodge(0.9))
ggplot(these.preds.with.ci.rs[these.preds.with.ci.rs$variable=='mses'& these.preds.with.ci.rs$forecastSet==442,],
aes(x = step, y = valueMedian, colour = model)) +
geom_errorbar(aes(ymax = valueHi, ymin = valueLo),
position = "dodge", width=0.25) +
geom_point(position = position_dodge(0.9))
ggplot(these.preds.with.ci.rs[these.preds.with.ci.rs$variable=='mses'& these.preds.with.ci.rs$forecastSet==442,],
aes(x = step, y = valueMedian, colour = model)) +
geom_errorbar(aes(ymax = valueHi, ymin = valueLo),
position = "dodge") +
geom_point(position = position_dodge(0.9))
ggplot(these.preds.with.ci.rs[these.preds.with.ci.rs$variable=='mses'& these.preds.with.ci.rs$forecastSet==442,],
aes(x = step, y = valueMedian, colour = model)) +
geom_errorbar(aes(ymax = valueHi, ymin = valueLo),
position = "dodge") +
geom_point(position = position_dodge(0.8))
ggplot(these.preds.with.ci.rs[these.preds.with.ci.rs$variable=='mses'& these.preds.with.ci.rs$forecastSet==442,],
aes(x = step, y = valueMedian, colour = model)) +
geom_errorbar(aes(ymax = valueHi, ymin = valueLo),
position =position_dodge(0.9)) +
geom_point(position = position_dodge(0.9))
ggplot(these.preds.with.ci.rs[these.preds.with.ci.rs$variable=='mses'& these.preds.with.ci.rs$forecastSet==442,],
aes(x = step, y = valueMedian, colour = model)) +
geom_errorbar(aes(ymax = valueHi, ymin = valueLo),
position =position_dodge(1.1)) +
geom_point(position = position_dodge(1.1))
ggplot(these.preds.with.ci.rs[these.preds.with.ci.rs$variable=='mses'& these.preds.with.ci.rs$forecastSet==442,],
aes(x = step, y = valueMedian, colour = model)) +
geom_errorbar(aes(ymax = valueHi, ymin = valueLo),
position =position_dodge(1)) +
geom_point(position = position_dodge(1))
ggplot(these.preds.with.ci.rs[these.preds.with.ci.rs$variable=='mses'& these.preds.with.ci.rs$forecastSet==442,],
aes(x = step, y = valueMedian, colour = model)) +
geom_errorbar(aes(ymax = valueHi, ymin = valueLo),
position =position_dodge(1)) +
geom_point(position = position_dodge(1))+
theme_bw()
ggplot(these.preds.with.ci.rs[these.preds.with.ci.rs$variable=='mses'& these.preds.with.ci.rs$forecastSet==442,],
aes(x = step, y = valueMedian, colour = model)) +
geom_errorbar(aes(ymax = valueHi, ymin = valueLo),
position =position_dodge(1)) +
geom_point(position = position_dodge(1))+
theme_light()
ggplot(these.preds.with.ci.rs[these.preds.with.ci.rs$variable=='mses'& these.preds.with.ci.rs$forecastSet==442,],
aes(x = step, y = valueMedian, colour = model)) +
geom_errorbar(aes(ymax = valueHi, ymin = valueLo),
position =position_dodge(1)) +
geom_point(position = position_dodge(1))+
theme_void()
ggplot(these.preds.with.ci.rs[these.preds.with.ci.rs$variable=='mses'& these.preds.with.ci.rs$forecastSet==442,],
aes(x = step, y = valueMedian, colour = model)) +
geom_errorbar(aes(ymax = valueHi, ymin = valueLo),
position =position_dodge(1)) +
geom_point(position = position_dodge(1))+
theme_minimal()
ggplot(these.preds.with.ci.rs[these.preds.with.ci.rs$variable=='mses'& these.preds.with.ci.rs$forecastSet==442,],
aes(x = step, y = valueMedian, colour = model)) +
geom_errorbar(aes(ymax = valueHi, ymin = valueLo),
position =position_dodge(1)) +
geom_point(position = position_dodge(1))+
theme_classic()
these.preds.with.ci.rs$valueHi-these.preds.with.ci.rs$valueLo
these.preds.with.ci.rs
these.preds.with.ci.rs
ggplot(these.preds.with.ci.rs[these.preds.with.ci.rs$variable=='mses'& these.preds.with.ci.rs$forecastSet>420& these.preds.with.ci.rs$forecastSet<460,],
aes(x = step, y = valueMedian, colour = model)) +
geom_errorbar(aes(ymax = valueHi, ymin = valueLo),
position =position_dodge(1)) +
geom_point(position = position_dodge(1))+
theme_classic()
names(these.preds.with.ci.rs)
these.preds.with.ci.rs
these.preds.with.ci.rs
these.preds
table(these.preds$step)
# Loop through the "steps". The steps are specific to the ViEWS competition. They refer to how far ahead I want to predict. Step 2 =2 months ahead, etc.
for(step in c(4:7)){
print('')
print('')
print(paste('STEP', step))
for(bootit in 30:nbootiterations){
print(paste('BOOT ITERATION', bootit))
# 1. Cut data into sequences
source('CODE/prepSequences.R')
# 2. calculate distance between sequences (for each step),
# Saves predictions to predictions.csv
source('CODE/calcDistances.R')
}
}
# Loop through the "steps". The steps are specific to the ViEWS competition. They refer to how far ahead I want to predict. Step 2 =2 months ahead, etc.
for(step in c(4:7)){
print('')
print('')
print(paste('STEP', step))
for(bootit in 41:nbootiterations){
print(paste('BOOT ITERATION', bootit))
# 1. Cut data into sequences
source('CODE/prepSequences.R')
# 2. calculate distance between sequences (for each step),
# Saves predictions to predictions.csv
source('CODE/calcDistances.R')
}
}
# IMPORTANT: Set your own working directory here:
setwd('~/Dropbox/viewsForecastingChallenge/CLEAN/VIEWS_Replication_Chadefaux_bootstrapAtEnd/')
rm(list=ls())
set.seed(0)
# Load functions
source('CODE/Functions/tadda.R')
source('CODE/load.data.R')
library(arrow) # useful to import parquet files from Python
library(TSclust) # may need to reinstall rgl and knitr
library(zoo)
library(readr)
library(dplyr)
library(tidyr)
library(speedglm)
library(ggplot2)
# Important: Choose pgm or cm below
#this.unit <- 'cm'
this.unit <- 'pgm'
# Do I want updates on MSE progression?
printUpdates <- FALSE
#----- Analysis starts here ------------
for(this.unit in c('cm', 'pgm')){
# Loop through the "steps". The steps are specific to the ViEWS competition. They refer to how far ahead I want to predict. Step 2 =2 months ahead, etc.
for(step in c(4:7)){
print('')
print('')
print(paste('STEP', step))
# 1. Cut data into sequences
source('CODE/prepSequences.R')
# 2. calculate distance between sequences (for each step),
# Saves predictions to predictions.csv
source('CODE/calcDistances.R')
}
# 3. Import the predictions, calibrate using a simple lm on past data, and predict future data. Output a file of form 'predictionsChadefaux_cm_s5_forecastSet.csv'.
# Also output a Predictions/results.csv, which reports various metrics such as MSE
source('CODE/distancesToPred.R')
# NOTE: the below is only relevant for the ViEWS competition.
# 4. At this stage, all the important steps have been completed.
# The below is just a bit of tidying up
# First, merge all `prediction' files into a single csv file, with each step as one column, as per ViEWS instructions
source('CODE/mergeFiles.R')
# 5. just a bit of tidying up the files now
source('CODE/cleanUp.R')
}
